In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import torch
from torch.utils.data import Dataset, DataLoader,random_split,Subset, SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from sklearn.model_selection import KFold
import torchvision
import os
import lightning as L
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping, LearningRateMonitor, Callback
from torchvision.datasets import ImageFolder
from torchvision import datasets
from torchvision.datasets.folder import default_loader
from torch.utils.data import ConcatDataset
from sklearn.model_selection import train_test_split
In [2]:
seed = 44444
np.random.seed(seed)
Data Exploration¶
In [3]:
data = pd.read_csv('birds.csv')
data.head()
Out[3]:
| class id | filepaths | labels | data set | scientific name | |
|---|---|---|---|---|---|
| 0 | 0.0 | train/ABBOTTS BABBLER/001.jpg | ABBOTTS BABBLER | train | MALACOCINCLA ABBOTTI |
| 1 | 0.0 | train/ABBOTTS BABBLER/007.jpg | ABBOTTS BABBLER | train | MALACOCINCLA ABBOTTI |
| 2 | 0.0 | train/ABBOTTS BABBLER/008.jpg | ABBOTTS BABBLER | train | MALACOCINCLA ABBOTTI |
| 3 | 0.0 | train/ABBOTTS BABBLER/009.jpg | ABBOTTS BABBLER | train | MALACOCINCLA ABBOTTI |
| 4 | 0.0 | train/ABBOTTS BABBLER/002.jpg | ABBOTTS BABBLER | train | MALACOCINCLA ABBOTTI |
In [4]:
path = data.loc[0, 'filepaths']
sample_img = cv2.imread(path)
print(f"Dataset shape: {data.shape}")
print(f"Image shape: {sample_img.shape}")
print(f"number of classes: {len(data['labels'].unique())}")
Dataset shape: (89885, 5) Image shape: (224, 224, 3) number of classes: 525
In [5]:
split = data['labels'].value_counts()
print(f"mean: {np.mean(split.values)}")
print(f"std: {np.std(split.values)}")
print(f"min: {np.min(split.values)}")
print(f"max: {np.max(split.values)}")
# plot hist of the split
mean: 171.2095238095238 std: 20.60193945474678 min: 140 max: 273
In [6]:
plt.figure(figsize=(20, 10))
plt.bar(split.index, split.values)
plt.show()
In [7]:
# Assuming 'data' has 'filepaths' and 'labels'
unique_labels = np.unique(data['labels'])
selected_labels = np.random.choice(unique_labels, size=16, replace=False)
# Select one image for each label
selected_images = []
for label in selected_labels:
images_for_label = [data['filepaths'][i] for i, l in enumerate(data['labels']) if l == label]
selected_image = np.random.choice(images_for_label)
selected_images.append((selected_image, label))
# Plotting
fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(15, 15), subplot_kw={'xticks': [], 'yticks': []})
for ax, (img_path, label) in zip(axes.flat, selected_images):
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert from BGR to RGB
ax.imshow(img)
ax.set_title(label, fontsize=10)
plt.tight_layout()
plt.show()
In [8]:
train_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
test_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
simple_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.ConvertImageDtype(torch.float32)
])
In [9]:
torch.backends.mps.is_available()
Out[9]:
True
Ploting histogram to see the distribution of the labels¶
In [10]:
def hist_loader(loader, title):
all_labels = []
for images, labels in loader:
all_labels.extend(labels.numpy())
plt.figure(figsize=(10, 5))
plt.hist(all_labels, bins=range(min(all_labels), max(all_labels) + 2), alpha=0.7, rwidth=0.85, align='left')
plt.xlabel('Label')
plt.ylabel('Frequency')
plt.title(title)
plt.show()
In [11]:
# Initialize the model using hyperparameters
hyperparameters = {
"learning_rate": 0.0001,
"batch_size": 32,
"input_size": (3, 224, 224),
"number_of_classes": 525,
"num_epochs": 10,
"use_batch_norm_list":True,
"device": 'mps' if torch.backends.mps.is_available() else 'cpu',
"seed": seed
}
Simple CNN model with 3 convolutional layers and 1 fully connected layer¶
In [12]:
class SimpleBirdsCNN(L.LightningModule):
def __init__(self, hyperparameters):
super().__init__()
# Initialize the model
self.hyperparameters = hyperparameters
# Convolutional layers
self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
# batch norm layers
self.batch_norm1 = nn.BatchNorm2d(16)
self.batch_norm2 = nn.BatchNorm2d(32)
self.batch_norm3 = nn.BatchNorm2d(64)
# Pooling layers
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
# Fully connected layers
self.fc1 = nn.Linear(in_features=64 * 7 * 7, out_features=1024)
# Output layer
self.out = nn.Linear(in_features=1024, out_features=self.hyperparameters["number_of_classes"])
# Dropout layer (added in training)
self.dropout = nn.Dropout(p=0.5)
def forward(self, x):
# Convolutional layers
x = self.conv1(x)
x = self.batch_norm1(x)
x = F.relu(x)
x = self.pool(x)
x = self.conv2(x)
x = self.batch_norm2(x)
x = F.relu(x)
x = self.pool(x)
x = self.conv3(x)
x = self.batch_norm3(x)
x = F.relu(x)
x = self.pool(x)
x = self.pool(x)
x = self.pool(x)
# Flatten
x = x.view(-1, 64 * 7 * 7)
# Fully connected layers
x = self.fc1(x)
x = F.relu(x)
x = self.dropout(x)
# Output layer
x = self.out(x)
return x
def training_step(self, batch, batch_idx):
# Load images and labels
images, labels = batch
# Forward pass
outputs = self(images)
# Calculate loss
loss = F.cross_entropy(outputs, labels)
# Calculate accuracy
_, predicted = torch.max(outputs, 1)
correct = (predicted == labels).sum().item()
accuracy = correct / len(labels)
# Log loss
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
self.log('train_acc', accuracy, on_step=True, on_epoch=True, prog_bar=True, logger=True)
return loss
def validation_step(self, batch, batch_idx):
# Load images and labels
images, labels = batch
# Forward pass
outputs = self(images)
# Calculate loss
loss = F.cross_entropy(outputs, labels)
# Calculate accuracy
_, predicted = torch.max(outputs, 1)
correct = (predicted == labels).sum().item()
accuracy = correct / len(labels)
# Log loss and accuracy
self.log('val_loss', loss, on_epoch=True, prog_bar=True, logger=True)
self.log('val_acc', accuracy, on_epoch=True, prog_bar=True, logger=True)
return loss
def test_step(self, batch, batch_idx):
images, labels = batch
outputs = self(images)
loss = F.cross_entropy(outputs, labels)
_, predicted = torch.max(outputs, 1)
correct = (predicted == labels).sum().item()
accuracy = correct / len(labels)
self.log('test_loss', loss, on_epoch=True, prog_bar=True, logger=True)
self.log('test_acc', accuracy, on_epoch=True, prog_bar=True, logger=True)
return loss
def configure_optimizers(self):
# final model will include l2 regularization
optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters["learning_rate"])
return optimizer
BirdsCNN model with 5 convolutional layers and 1 fully connected layer¶
In [13]:
class BirdsCNN(L.LightningModule):
def __init__(self, hyperparameters):
super().__init__()
self.hyperparameters = hyperparameters
# Convolutional layers
self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
# batch norm layers
self.batch_norm1 = nn.BatchNorm2d(16)
self.batch_norm2 = nn.BatchNorm2d(32)
self.batch_norm3 = nn.BatchNorm2d(64)
self.batch_norm4 = nn.BatchNorm2d(128)
self.batch_norm5 = nn.BatchNorm2d(256)
# Pooling layers
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
# Fully connected layers
self.fc1 = nn.Linear(in_features=256 * 7 * 7, out_features=1024)
# Output layer
self.out = nn.Linear(in_features=1024, out_features=self.hyperparameters["number_of_classes"])
# Dropout layer (added in training)
self.dropout1 = nn.Dropout(p=0.3)
self.dropout2 = nn.Dropout(p=0.5)
def forward(self, x):
# Convolutional layers
x = self.conv1(x)
x = self.batch_norm1(x)
x = F.relu(x)
x = self.pool(x)
x = self.conv2(x)
x = self.batch_norm2(x)
x = F.relu(x)
x = self.pool(x)
x = self.conv3(x)
x = self.batch_norm3(x)
x = F.relu(x)
x = self.pool(x)
x = self.conv4(x)
x = self.batch_norm4(x)
x = F.relu(x)
x = self.pool(x)
x = self.conv5(x)
x = self.batch_norm5(x)
x = F.relu(x)
x = self.pool(x)
x = self.dropout1(x)
# Flatten
x = x.view(-1, 256 * 7 * 7)
# Fully connected layers
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
# Output layer
x = self.out(x)
return x
def training_step(self, batch, batch_idx):
# Load images and labels
images, labels = batch
# Forward pass
outputs = self.forward(images)
# Calculate loss
loss = F.cross_entropy(outputs, labels)
# Calculate accuracy
_, predicted = torch.max(outputs, 1)
correct = (predicted == labels).sum().item()
accuracy = correct / len(labels)
# Log loss
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
self.log('train_acc', accuracy, on_step=True, on_epoch=True, prog_bar=True, logger=True)
return loss
def validation_step(self, batch, batch_idx):
# Load images and labels
images, labels = batch
# Forward pass
outputs = self.forward(images)
# Calculate loss
loss = F.cross_entropy(outputs, labels)
# Calculate accuracy
_, predicted = torch.max(outputs, 1)
correct = (predicted == labels).sum().item()
accuracy = correct / len(labels)
# Log loss and accuracy
self.log('val_loss', loss, on_epoch=True, prog_bar=True, logger=True)
self.log('val_acc', accuracy, on_epoch=True, prog_bar=True, logger=True)
return loss
def test_step(self, batch, batch_idx):
images, labels = batch
outputs = self.forward(images)
loss = F.cross_entropy(outputs, labels)
_, predicted = torch.max(outputs, 1)
correct = (predicted == labels).sum().item()
accuracy = correct / len(labels)
self.log('test_loss', loss, on_epoch=True, prog_bar=True, logger=True)
self.log('test_acc', accuracy, on_epoch=True, prog_bar=True, logger=True)
return loss
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters["learning_rate"], weight_decay=1e-5)
return optimizer
K-fold cross validation¶
In [14]:
def k_fold_cross_validation(train_dataset, test_dataset ,batch_size ,k=5, tensorboard_name="default"):
kfold = KFold(n_splits=k, shuffle=True, random_state=seed)
test_loader = DataLoader(test_dataset, batch_size=batch_size, pin_memory=True, shuffle=False)
models = []
for fold, (train_ids, valid_ids) in enumerate(kfold.split(train_dataset)):
# Split the dataset
print(f'FOLD {fold}')
print('--------------------------------')
# Create data loaders for the current fold
train_loader = DataLoader(train_dataset, batch_size=batch_size, pin_memory=True, sampler=SubsetRandomSampler(train_ids), num_workers=10)
valid_loader = DataLoader(train_dataset, batch_size=batch_size, pin_memory=True, sampler=SubsetRandomSampler(valid_ids), num_workers=10)
# Initialize the model
model = BirdsCNN(hyperparameters)
early_stop_callback = EarlyStopping(monitor='val_acc', min_delta=0.00 ,patience=8, verbose=True, mode='max')
checkpoint_callback = ModelCheckpoint(dirpath="final_simple_checkpoints/",monitor='val_loss', save_top_k=1, mode='min')
tensorboard_logger = TensorBoardLogger("final_logs", name=f"{tensorboard_name}_fold_{fold}")
trainer = L.Trainer(
default_root_dir="checkpoints_simple_final_trainer/", # Where to save models
# We run on a single GPU (if possible)
accelerator="auto",
devices="auto",
logger=tensorboard_logger,
# How many epochs to train for if no patience is set
max_epochs=hyperparameters["num_epochs"],
callbacks=[
early_stop_callback,
LearningRateMonitor("epoch"),
checkpoint_callback,
], # Log learning rate every epoch
) # In case your notebook crashes due to the progress bar, consider increasing the refresh rate
trainer.logger._log_graph = True # If True, we plot the computation graph in tensorboard
trainer.logger._default_hp_metric = None # Optional logging argument that we don't need
# Train the model ⚡
trainer.fit(model, train_loader, valid_loader)
models.append(model)
# validate the model
# trainer.validate()
# Test the model
trainer.test(model, dataloaders=valid_loader)
trainer.test(model, dataloaders=test_loader)
return models
In [15]:
hyperparameters
Out[15]:
{'learning_rate': 0.0001,
'batch_size': 32,
'input_size': (3, 224, 224),
'number_of_classes': 525,
'num_epochs': 10,
'use_batch_norm_list': True,
'device': 'mps',
'seed': 44444}
Data Preprocessing and Augmentation¶
In [16]:
train_dataset = ImageFolder('train', transform=train_transform)
test_dataset = ImageFolder('test', transform=test_transform)
valid_dataset = ImageFolder('valid', transform=test_transform)
Running K-fold cross validation with the BirdsCNN model¶
In [17]:
# run k-fold cross validation
# running best configuration after K-fold cross validation
batch_size = hyperparameters["batch_size"]
learning_rate = hyperparameters["learning_rate"]
models = k_fold_cross_validation(train_dataset,test_dataset, batch_size ,k=5 , tensorboard_name=f"finakl{batch_size}_lr_{learning_rate}_Kfold")
GPU available: True (mps), used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs HPU available: False, using: 0 HPUs Missing logger folder: final_logs/finakl32_lr_0.0001_Kfold_fold_0
FOLD 0 --------------------------------
/Users/ofekglik/miniconda3/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:617: UserWarning: Checkpoint directory final_simple_checkpoints/ exists and is not empty.
rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
| Name | Type | Params
---------------------------------------------
0 | conv1 | Conv2d | 448
1 | conv2 | Conv2d | 4.6 K
2 | conv3 | Conv2d | 18.5 K
3 | conv4 | Conv2d | 73.9 K
4 | conv5 | Conv2d | 295 K
5 | batch_norm1 | BatchNorm2d | 32
6 | batch_norm2 | BatchNorm2d | 64
7 | batch_norm3 | BatchNorm2d | 128
8 | batch_norm4 | BatchNorm2d | 256
9 | batch_norm5 | BatchNorm2d | 512
10 | pool | MaxPool2d | 0
11 | fc1 | Linear | 12.8 M
12 | out | Linear | 538 K
13 | dropout1 | Dropout | 0
14 | dropout2 | Dropout | 0
---------------------------------------------
13.8 M Trainable params
0 Non-trainable params
13.8 M Total params
55.111 Total estimated model params size (MB)
/Users/ofekglik/miniconda3/lib/python3.11/site-packages/lightning/pytorch/loggers/tensorboard.py:198: UserWarning: Could not log computational graph to TensorBoard: The `model.example_input_array` attribute is not set or `input_array` was not given.
rank_zero_warn(
Sanity Checking: 0it [00:00, ?it/s]
Training: 0it [00:00, ?it/s]
Validation: 0it [00:00, ?it/s]
Metric val_acc improved. New best score: 0.115
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.143 >= min_delta = 0.0. New best score: 0.258
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.096 >= min_delta = 0.0. New best score: 0.354
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.071 >= min_delta = 0.0. New best score: 0.425
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.038 >= min_delta = 0.0. New best score: 0.463
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.039 >= min_delta = 0.0. New best score: 0.502
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.018 >= min_delta = 0.0. New best score: 0.520
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.033 >= min_delta = 0.0. New best score: 0.552
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.017 >= min_delta = 0.0. New best score: 0.570
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.012 >= min_delta = 0.0. New best score: 0.582 `Trainer.fit` stopped: `max_epochs=10` reached.
Testing: 0it [00:00, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Runningstage.testing ┃ ┃ ┃ metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ test_acc │ 0.5846871733665466 │ │ test_loss │ 1.8597854375839233 │ └───────────────────────────┴───────────────────────────┘
/Users/ofekglik/miniconda3/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:442: PossibleUserWarning: The dataloader, test_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 10 which is the number of cpus on this machine) in the `DataLoader` init to improve performance. rank_zero_warn(
Testing: 0it [00:00, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Runningstage.testing ┃ ┃ ┃ metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ test_acc │ 0.6880000233650208 │ │ test_loss │ 1.3587158918380737 │ └───────────────────────────┴───────────────────────────┘
GPU available: True (mps), used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs HPU available: False, using: 0 HPUs Missing logger folder: final_logs/finakl32_lr_0.0001_Kfold_fold_1 | Name | Type | Params --------------------------------------------- 0 | conv1 | Conv2d | 448 1 | conv2 | Conv2d | 4.6 K 2 | conv3 | Conv2d | 18.5 K 3 | conv4 | Conv2d | 73.9 K 4 | conv5 | Conv2d | 295 K 5 | batch_norm1 | BatchNorm2d | 32 6 | batch_norm2 | BatchNorm2d | 64 7 | batch_norm3 | BatchNorm2d | 128 8 | batch_norm4 | BatchNorm2d | 256 9 | batch_norm5 | BatchNorm2d | 512 10 | pool | MaxPool2d | 0 11 | fc1 | Linear | 12.8 M 12 | out | Linear | 538 K 13 | dropout1 | Dropout | 0 14 | dropout2 | Dropout | 0 --------------------------------------------- 13.8 M Trainable params 0 Non-trainable params 13.8 M Total params 55.111 Total estimated model params size (MB)
FOLD 1 --------------------------------
Sanity Checking: 0it [00:00, ?it/s]
Training: 0it [00:00, ?it/s]
Validation: 0it [00:00, ?it/s]
Metric val_acc improved. New best score: 0.098
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.140 >= min_delta = 0.0. New best score: 0.238
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.108 >= min_delta = 0.0. New best score: 0.346
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.051 >= min_delta = 0.0. New best score: 0.397
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.037 >= min_delta = 0.0. New best score: 0.434
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.035 >= min_delta = 0.0. New best score: 0.469
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.033 >= min_delta = 0.0. New best score: 0.503
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.020 >= min_delta = 0.0. New best score: 0.523
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.027 >= min_delta = 0.0. New best score: 0.549
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.007 >= min_delta = 0.0. New best score: 0.556 `Trainer.fit` stopped: `max_epochs=10` reached.
Testing: 0it [00:00, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Runningstage.testing ┃ ┃ ┃ metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ test_acc │ 0.5552667379379272 │ │ test_loss │ 2.0222301483154297 │ └───────────────────────────┴───────────────────────────┘
Testing: 0it [00:00, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Runningstage.testing ┃ ┃ ┃ metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ test_acc │ 0.6483809351921082 │ │ test_loss │ 1.5434068441390991 │ └───────────────────────────┴───────────────────────────┘
GPU available: True (mps), used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs HPU available: False, using: 0 HPUs Missing logger folder: final_logs/finakl32_lr_0.0001_Kfold_fold_2 | Name | Type | Params --------------------------------------------- 0 | conv1 | Conv2d | 448 1 | conv2 | Conv2d | 4.6 K 2 | conv3 | Conv2d | 18.5 K 3 | conv4 | Conv2d | 73.9 K 4 | conv5 | Conv2d | 295 K 5 | batch_norm1 | BatchNorm2d | 32 6 | batch_norm2 | BatchNorm2d | 64 7 | batch_norm3 | BatchNorm2d | 128 8 | batch_norm4 | BatchNorm2d | 256 9 | batch_norm5 | BatchNorm2d | 512 10 | pool | MaxPool2d | 0 11 | fc1 | Linear | 12.8 M 12 | out | Linear | 538 K 13 | dropout1 | Dropout | 0 14 | dropout2 | Dropout | 0 --------------------------------------------- 13.8 M Trainable params 0 Non-trainable params 13.8 M Total params 55.111 Total estimated model params size (MB)
FOLD 2 --------------------------------
Sanity Checking: 0it [00:00, ?it/s]
Training: 0it [00:00, ?it/s]
Validation: 0it [00:00, ?it/s]
Metric val_acc improved. New best score: 0.077
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.134 >= min_delta = 0.0. New best score: 0.211
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.090 >= min_delta = 0.0. New best score: 0.301
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.078 >= min_delta = 0.0. New best score: 0.379
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.039 >= min_delta = 0.0. New best score: 0.418
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.033 >= min_delta = 0.0. New best score: 0.451
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.026 >= min_delta = 0.0. New best score: 0.477
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.029 >= min_delta = 0.0. New best score: 0.507
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.021 >= min_delta = 0.0. New best score: 0.527
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.014 >= min_delta = 0.0. New best score: 0.542 `Trainer.fit` stopped: `max_epochs=10` reached.
Testing: 0it [00:00, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Runningstage.testing ┃ ┃ ┃ metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ test_acc │ 0.5424469709396362 │ │ test_loss │ 2.073704957962036 │ └───────────────────────────┴───────────────────────────┘
Testing: 0it [00:00, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Runningstage.testing ┃ ┃ ┃ metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ test_acc │ 0.6369524002075195 │ │ test_loss │ 1.590436339378357 │ └───────────────────────────┴───────────────────────────┘
GPU available: True (mps), used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs HPU available: False, using: 0 HPUs Missing logger folder: final_logs/finakl32_lr_0.0001_Kfold_fold_3 | Name | Type | Params --------------------------------------------- 0 | conv1 | Conv2d | 448 1 | conv2 | Conv2d | 4.6 K 2 | conv3 | Conv2d | 18.5 K 3 | conv4 | Conv2d | 73.9 K 4 | conv5 | Conv2d | 295 K 5 | batch_norm1 | BatchNorm2d | 32 6 | batch_norm2 | BatchNorm2d | 64 7 | batch_norm3 | BatchNorm2d | 128 8 | batch_norm4 | BatchNorm2d | 256 9 | batch_norm5 | BatchNorm2d | 512 10 | pool | MaxPool2d | 0 11 | fc1 | Linear | 12.8 M 12 | out | Linear | 538 K 13 | dropout1 | Dropout | 0 14 | dropout2 | Dropout | 0 --------------------------------------------- 13.8 M Trainable params 0 Non-trainable params 13.8 M Total params 55.111 Total estimated model params size (MB)
FOLD 3 --------------------------------
Sanity Checking: 0it [00:00, ?it/s]
Training: 0it [00:00, ?it/s]
Validation: 0it [00:00, ?it/s]
Metric val_acc improved. New best score: 0.091
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.128 >= min_delta = 0.0. New best score: 0.219
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.091 >= min_delta = 0.0. New best score: 0.310
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.070 >= min_delta = 0.0. New best score: 0.381
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.048 >= min_delta = 0.0. New best score: 0.429
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.044 >= min_delta = 0.0. New best score: 0.473
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.029 >= min_delta = 0.0. New best score: 0.502
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.023 >= min_delta = 0.0. New best score: 0.525
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.027 >= min_delta = 0.0. New best score: 0.552
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.011 >= min_delta = 0.0. New best score: 0.563 `Trainer.fit` stopped: `max_epochs=10` reached.
Testing: 0it [00:00, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Runningstage.testing ┃ ┃ ┃ metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ test_acc │ 0.5657234191894531 │ │ test_loss │ 1.9477201700210571 │ └───────────────────────────┴───────────────────────────┘
Testing: 0it [00:00, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Runningstage.testing ┃ ┃ ┃ metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ test_acc │ 0.6632381081581116 │ │ test_loss │ 1.4862173795700073 │ └───────────────────────────┴───────────────────────────┘
GPU available: True (mps), used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs HPU available: False, using: 0 HPUs Missing logger folder: final_logs/finakl32_lr_0.0001_Kfold_fold_4 | Name | Type | Params --------------------------------------------- 0 | conv1 | Conv2d | 448 1 | conv2 | Conv2d | 4.6 K 2 | conv3 | Conv2d | 18.5 K 3 | conv4 | Conv2d | 73.9 K 4 | conv5 | Conv2d | 295 K 5 | batch_norm1 | BatchNorm2d | 32 6 | batch_norm2 | BatchNorm2d | 64 7 | batch_norm3 | BatchNorm2d | 128 8 | batch_norm4 | BatchNorm2d | 256 9 | batch_norm5 | BatchNorm2d | 512 10 | pool | MaxPool2d | 0 11 | fc1 | Linear | 12.8 M 12 | out | Linear | 538 K 13 | dropout1 | Dropout | 0 14 | dropout2 | Dropout | 0 --------------------------------------------- 13.8 M Trainable params 0 Non-trainable params 13.8 M Total params 55.111 Total estimated model params size (MB)
FOLD 4 --------------------------------
Sanity Checking: 0it [00:00, ?it/s]
Training: 0it [00:00, ?it/s]
Validation: 0it [00:00, ?it/s]
Metric val_acc improved. New best score: 0.072
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.135 >= min_delta = 0.0. New best score: 0.207
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.088 >= min_delta = 0.0. New best score: 0.295
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.054 >= min_delta = 0.0. New best score: 0.349
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.050 >= min_delta = 0.0. New best score: 0.399
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.040 >= min_delta = 0.0. New best score: 0.439
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.021 >= min_delta = 0.0. New best score: 0.460
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.015 >= min_delta = 0.0. New best score: 0.475
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.035 >= min_delta = 0.0. New best score: 0.510
Validation: 0it [00:00, ?it/s]
Metric val_acc improved by 0.015 >= min_delta = 0.0. New best score: 0.526 `Trainer.fit` stopped: `max_epochs=10` reached.
Testing: 0it [00:00, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Runningstage.testing ┃ ┃ ┃ metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ test_acc │ 0.528032124042511 │ │ test_loss │ 2.1339237689971924 │ └───────────────────────────┴───────────────────────────┘
Testing: 0it [00:00, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Runningstage.testing ┃ ┃ ┃ metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ test_acc │ 0.6380952596664429 │ │ test_loss │ 1.576202392578125 │ └───────────────────────────┴───────────────────────────┘
In [20]:
chosen_model = models[0]
In [21]:
test_dataset = ImageFolder('test', transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, pin_memory=True, shuffle=False)
In [22]:
def plot_top_confidence_misclassified_images(model, loader, top_k=5):
model.eval()
misclassified_images = []
with torch.no_grad():
for images, labels in loader:
outputs = model(images)
probabilities = torch.softmax(outputs, dim=1)
confidences, predictions = torch.max(probabilities, dim=1)
for i in range(images.size(0)):
if predictions[i] != labels[i]: # Check for misclassification
misclassified_images.append((images[i], labels[i], predictions[i], confidences[i].item()))
labels = []
# Sort by confidence score in descending order and keep top k
misclassified_images.sort(key=lambda x: x[3], reverse=True)
top_misclassified_images = misclassified_images[:top_k]
# Plotting
fig, axes = plt.subplots(nrows=1, ncols=top_k, figsize=(20, 4), subplot_kw={'xticks': [], 'yticks': []})
for ax, (image, true_label, predicted_label, confidence) in zip(axes, top_misclassified_images):
image = image.permute(1, 2, 0) # Convert from (C, H, W) to (H, W, C) if necessary
image = image / 2 + 0.5 # Unnormalize
labels.append(predicted_label)
ax.imshow(image.cpu().numpy())
ax.set_title(f'True: {true_label}\nPred: {predicted_label}\nConf: {confidence:.2f}')
plt.tight_layout()
plt.show()
return labels
missing_labels = plot_top_confidence_misclassified_images(chosen_model, test_loader)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
In [23]:
def show_images_side_by_side(loader, labels_to_show):
images_to_plot = []
labels_found = []
for images, labels in loader:
for i, label in enumerate(labels):
if label.item() in labels_to_show:
image = images[i].permute(1, 2, 0)
images_to_plot.append(image)
labels_found.append(label.item())
# Remove found label from the list to show
labels_to_show.remove(label.item())
# Break out of the loop if all labels have been found
if not labels_to_show:
break
if not labels_to_show:
break
# Plotting all found images side by side
num_images = len(images_to_plot)
if num_images > 0:
plt.figure(figsize=(num_images * 5, 5)) # Adjust figure size dynamically based on the number of images
for i, (image, label) in enumerate(zip(images_to_plot, labels_found)):
plt.subplot(1, num_images, i + 1)
image = image / 2 + 0.5 # Unnormalize
# convert bgr to rgb
image = image.cpu().numpy()
plt.imshow(image)
plt.title(f'Label: {label}')
plt.axis('off') # Hide axis for better visualization
plt.show()
else:
print("No images found for the specified labels.")
show_images_side_by_side(test_loader, missing_labels)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
In [27]:
#torch.save(chosen_model, 'final_model.pkl')
In [26]:
augmentations = [None,
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.RandomRotation(90),
transforms.RandomRotation(180),
transforms.RandomRotation(270),
]
predictions = []
chosen_model.eval()
labels = []
with torch.no_grad():
for i in range(len(augmentations)):
tmp_predictions = []
temp_labels = []
for images, labels in test_loader:
if augmentations[i]:
images = augmentations[i](images)
outputs = chosen_model(images)
_, predicted = torch.max(outputs, 1)
tmp_predictions.append(predicted)
temp_labels.append(labels)
predictions.append(torch.cat(tmp_predictions, 0))
labels = torch.cat(temp_labels, 0)
predictions = torch.stack(predictions)
predictions_mode = torch.mode(predictions, 0)
correct = (predictions_mode.values == labels).sum().item()
acc = correct / (len(labels))
print(f"Test accuracy after augmentations: {acc}")
Test accuracy after augmentations: 0.632
In [31]:
class WhiteWagtailDataset(datasets.DatasetFolder):
def __init__(self, root, transform=None, loader=default_loader):
super(WhiteWagtailDataset, self).__init__(root, loader, extensions=('.jpg'), transform=transform)
self.target = 'White-Wagtail'
self.imgs = [os.path.join(root, os.path.join(self.target, img)) for img in os.listdir(f'{root}/{self.target}')]
def __getitem__(self, index):
path = self.imgs[index]
sample = self.loader(path)
if self.transform is not None:
sample = self.transform(sample)
return sample, self.target
def __len__(self):
return len(self.imgs)
In [32]:
train_dir = "train"
test_dir = "valid"
# Create an instance of the WhiteWagtailDataset class
white_wagtail_train_dataset = WhiteWagtailDataset(train_dir, transform=train_transform)
white_wagtail_test_dataset = WhiteWagtailDataset(test_dir, transform=test_transform)
In [33]:
train_dataset_size = len(white_wagtail_train_dataset)
train_indices, val_indices = train_test_split(range(train_dataset_size), test_size=0.25, random_state=42)
white_wagtail_train_subset = Subset(train_dataset, train_indices)
white_wagtail_val_subset = Subset(train_dataset, val_indices)
In [34]:
concatenated_train_dataset = ConcatDataset([white_wagtail_train_subset, train_dataset])
concatenated_valid_dataset = ConcatDataset([white_wagtail_val_subset, valid_dataset])
concatenated_test_dataset = ConcatDataset([white_wagtail_test_dataset, test_dataset])
In [35]:
concatenated_train_loader = DataLoader(concatenated_train_dataset, batch_size=hyperparameters['batch_size'], shuffle=True, num_workers=10)
concatenated_valid_loader = DataLoader(concatenated_valid_dataset, batch_size=hyperparameters['batch_size'], shuffle=True, num_workers=10)
concatenated_test_loader = DataLoader(concatenated_test_dataset, batch_size=hyperparameters['batch_size'], shuffle=True, num_workers=10)
In [43]:
def train_model(hyperparameters, train_loader, valid_loader):
# Create a PyTorch Lightning trainer with the generation callback
trainer = L.Trainer(
# default_root_dir=os.path.join(CHECKPOINT_PATH, model_name), # Where to save models
# We run on a single GPU (if possible)
accelerator="auto",
devices=1,
# How many epochs to train for if no patience is set
max_epochs=hyperparameters['num_epochs'],
callbacks=[
ModelCheckpoint(
save_weights_only=True, mode="max", monitor="val_acc"
), # Save the best checkpoint based on the maximum val_acc recorded. Saves only weights and not optimizer
LearningRateMonitor("epoch"),
], # Log learning rate every epoch
) # In case your notebook crashes due to the progress bar, consider increasing the refresh rate
L.seed_everything(42) # To be reproducable
model = BirdsCNN(hyperparameters)
trainer.fit(model, train_loader, valid_loader)
return model
Train the model with the White Wagtail dataset for 3 epochs¶
In [44]:
hyperparameters["number_of_classes"] = 526
hyperparameters["num_epochs"] = 3
In [46]:
model = train_model(hyperparameters, concatenated_train_loader, concatenated_valid_loader)
GPU available: True (mps), used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs HPU available: False, using: 0 HPUs Global seed set to 42 | Name | Type | Params --------------------------------------------- 0 | conv1 | Conv2d | 448 1 | conv2 | Conv2d | 4.6 K 2 | conv3 | Conv2d | 18.5 K 3 | conv4 | Conv2d | 73.9 K 4 | conv5 | Conv2d | 295 K 5 | batch_norm1 | BatchNorm2d | 32 6 | batch_norm2 | BatchNorm2d | 64 7 | batch_norm3 | BatchNorm2d | 128 8 | batch_norm4 | BatchNorm2d | 256 9 | batch_norm5 | BatchNorm2d | 512 10 | pool | MaxPool2d | 0 11 | fc1 | Linear | 12.8 M 12 | out | Linear | 539 K 13 | dropout1 | Dropout | 0 14 | dropout2 | Dropout | 0 --------------------------------------------- 13.8 M Trainable params 0 Non-trainable params 13.8 M Total params 55.115 Total estimated model params size (MB)
Sanity Checking: 0it [00:00, ?it/s]
/Users/ofekglik/miniconda3/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:490: PossibleUserWarning: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders. rank_zero_warn(
Training: 0it [00:00, ?it/s]
Validation: 0it [00:00, ?it/s]
Validation: 0it [00:00, ?it/s]
Validation: 0it [00:00, ?it/s]
`Trainer.fit` stopped: `max_epochs=3` reached.
In [47]:
model
Out[47]:
BirdsCNN( (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv5): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (batch_norm1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (batch_norm2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (batch_norm3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (batch_norm4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (batch_norm5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (fc1): Linear(in_features=12544, out_features=1024, bias=True) (out): Linear(in_features=1024, out_features=526, bias=True) (dropout1): Dropout(p=0.3, inplace=False) (dropout2): Dropout(p=0.5, inplace=False) )
In [ ]: